From 628c2a958eb5ed993b4430307f4267ef24df97b0 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Thu, 15 Jan 2004 19:13:38 +0000 Subject: [PATCH] bitkeeper revision 1.674 (4006e662w0P_G2ebTRx5hqQilMAXTw) rbtree.h, rbtree.c: new file Many files: Cleanups to new VBD mechanisms. --- .rootkeys | 2 + xen/common/domain.c | 26 + xen/common/rbtree.c | 295 ++++++++++ xen/drivers/block/xen_block.c | 22 +- xen/drivers/block/xen_vbd.c | 458 ++++++++++------ xen/drivers/ide/ide-xeno.c | 66 +-- xen/drivers/scsi/sd.c | 51 +- xen/include/xeno/multiboot.h | 10 +- xen/include/xeno/rbtree.h | 134 +++++ xen/include/xeno/sched.h | 5 +- xen/include/xeno/vbd.h | 33 +- .../arch/xeno/drivers/block/xl_block.c | 164 ++---- .../arch/xeno/drivers/block/xl_block.h | 13 +- .../arch/xeno/drivers/block/xl_vbd.c | 513 +++++++----------- .../include/asm-xeno/hypervisor.h | 8 +- 15 files changed, 1089 insertions(+), 711 deletions(-) create mode 100644 xen/common/rbtree.c create mode 100644 xen/include/xeno/rbtree.h diff --git a/.rootkeys b/.rootkeys index 378674e672..2ee4664034 100644 --- a/.rootkeys +++ b/.rootkeys @@ -134,6 +134,7 @@ 3ddb79bdN51qpRC-6bOH-v5hl_AK6A xen/common/network.c 3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen/common/page_alloc.c 3e54c38dkHAev597bPr71-hGzTdocg xen/common/perfc.c +4006e659i9j-doVxY7DKOGU4XVin1Q xen/common/rbtree.c 3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c 3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c 3ddb79bdB9RNMnkQnUyZ5C9hhMSQQw xen/common/slab.c @@ -461,6 +462,7 @@ 3e54c38dlSCVdyVM4PKcrSfzLLxWUQ xen/include/xeno/perfc.h 3e54c38de9SUSYSAwxDf_DwkpAnQFA xen/include/xeno/perfc_defn.h 3ddb79c04nQVR3EYM5L4zxDV_MCo1g xen/include/xeno/prefetch.h +4006e65fWMwLqcocgik6wbF0Eeh0Og xen/include/xeno/rbtree.h 3e4540ccU1sgCx8seIMGlahmMfv7yQ xen/include/xeno/reboot.h 3ddb79c0LzqqS0LhAQ50ekgj4oGl7Q xen/include/xeno/sched.h 3ddb79c0VDeD-Oft5eNfMneTU3D1dQ xen/include/xeno/skbuff.h diff --git a/xen/common/domain.c b/xen/common/domain.c index 147d9c0f97..315539e507 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -542,6 +542,11 @@ int setup_guestos(struct task_struct *p, dom0_createdomain_t *params, l1_pgentry_t *l1tab = NULL, *l1start = NULL; struct pfn_info *page = NULL; + extern void ide_probe_devices(xen_disk_info_t *); + extern void scsi_probe_devices(xen_disk_info_t *); + xen_disk_info_t xdi; + xen_disk_t *xd; + /* Sanity! */ if ( p->domain != 0 ) BUG(); if ( (p->flags & PF_CONSTRUCTED) ) BUG(); @@ -771,6 +776,27 @@ int setup_guestos(struct task_struct *p, dom0_createdomain_t *params, write_cr3_counted(pagetable_val(current->mm.pagetable)); __sti(); + /* DOM0 gets access to all real block devices. */ +#define MAX_REAL_DISKS 256 + xd = kmalloc(MAX_REAL_DISKS * sizeof(xen_disk_t), GFP_KERNEL); + xdi.max = MAX_REAL_DISKS; + xdi.count = 0; + xdi.disks = xd; + ide_probe_devices(&xdi); + scsi_probe_devices(&xdi); + for ( i = 0; i < xdi.count; i++ ) + { + xen_extent_t e; + e.device = xd[i].device; + e.start_sector = 0; + e.nr_sectors = xd[i].capacity; + if ( (__vbd_create(p, xd[i].device, VBD_MODE_R|VBD_MODE_W, + xd[i].info) != 0) || + (__vbd_grow(p, xd[i].device, &e) != 0) ) + BUG(); + } + kfree(xd); + p->flags |= PF_CONSTRUCTED; new_thread(p, diff --git a/xen/common/rbtree.c b/xen/common/rbtree.c new file mode 100644 index 0000000000..dbd35fc31d --- /dev/null +++ b/xen/common/rbtree.c @@ -0,0 +1,295 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +#include + +static void __rb_rotate_left(rb_node_t * node, rb_root_t * root) +{ + rb_node_t * right = node->rb_right; + + if ((node->rb_right = right->rb_left)) + right->rb_left->rb_parent = node; + right->rb_left = node; + + if ((right->rb_parent = node->rb_parent)) + { + if (node == node->rb_parent->rb_left) + node->rb_parent->rb_left = right; + else + node->rb_parent->rb_right = right; + } + else + root->rb_node = right; + node->rb_parent = right; +} + +static void __rb_rotate_right(rb_node_t * node, rb_root_t * root) +{ + rb_node_t * left = node->rb_left; + + if ((node->rb_left = left->rb_right)) + left->rb_right->rb_parent = node; + left->rb_right = node; + + if ((left->rb_parent = node->rb_parent)) + { + if (node == node->rb_parent->rb_right) + node->rb_parent->rb_right = left; + else + node->rb_parent->rb_left = left; + } + else + root->rb_node = left; + node->rb_parent = left; +} + +void rb_insert_color(rb_node_t * node, rb_root_t * root) +{ + rb_node_t * parent, * gparent; + + while ((parent = node->rb_parent) && parent->rb_color == RB_RED) + { + gparent = parent->rb_parent; + + if (parent == gparent->rb_left) + { + { + register rb_node_t * uncle = gparent->rb_right; + if (uncle && uncle->rb_color == RB_RED) + { + uncle->rb_color = RB_BLACK; + parent->rb_color = RB_BLACK; + gparent->rb_color = RB_RED; + node = gparent; + continue; + } + } + + if (parent->rb_right == node) + { + register rb_node_t * tmp; + __rb_rotate_left(parent, root); + tmp = parent; + parent = node; + node = tmp; + } + + parent->rb_color = RB_BLACK; + gparent->rb_color = RB_RED; + __rb_rotate_right(gparent, root); + } else { + { + register rb_node_t * uncle = gparent->rb_left; + if (uncle && uncle->rb_color == RB_RED) + { + uncle->rb_color = RB_BLACK; + parent->rb_color = RB_BLACK; + gparent->rb_color = RB_RED; + node = gparent; + continue; + } + } + + if (parent->rb_left == node) + { + register rb_node_t * tmp; + __rb_rotate_right(parent, root); + tmp = parent; + parent = node; + node = tmp; + } + + parent->rb_color = RB_BLACK; + gparent->rb_color = RB_RED; + __rb_rotate_left(gparent, root); + } + } + + root->rb_node->rb_color = RB_BLACK; +} +EXPORT_SYMBOL(rb_insert_color); + +static void __rb_erase_color(rb_node_t * node, rb_node_t * parent, + rb_root_t * root) +{ + rb_node_t * other; + + while ((!node || node->rb_color == RB_BLACK) && node != root->rb_node) + { + if (parent->rb_left == node) + { + other = parent->rb_right; + if (other->rb_color == RB_RED) + { + other->rb_color = RB_BLACK; + parent->rb_color = RB_RED; + __rb_rotate_left(parent, root); + other = parent->rb_right; + } + if ((!other->rb_left || + other->rb_left->rb_color == RB_BLACK) + && (!other->rb_right || + other->rb_right->rb_color == RB_BLACK)) + { + other->rb_color = RB_RED; + node = parent; + parent = node->rb_parent; + } + else + { + if (!other->rb_right || + other->rb_right->rb_color == RB_BLACK) + { + register rb_node_t * o_left; + if ((o_left = other->rb_left)) + o_left->rb_color = RB_BLACK; + other->rb_color = RB_RED; + __rb_rotate_right(other, root); + other = parent->rb_right; + } + other->rb_color = parent->rb_color; + parent->rb_color = RB_BLACK; + if (other->rb_right) + other->rb_right->rb_color = RB_BLACK; + __rb_rotate_left(parent, root); + node = root->rb_node; + break; + } + } + else + { + other = parent->rb_left; + if (other->rb_color == RB_RED) + { + other->rb_color = RB_BLACK; + parent->rb_color = RB_RED; + __rb_rotate_right(parent, root); + other = parent->rb_left; + } + if ((!other->rb_left || + other->rb_left->rb_color == RB_BLACK) + && (!other->rb_right || + other->rb_right->rb_color == RB_BLACK)) + { + other->rb_color = RB_RED; + node = parent; + parent = node->rb_parent; + } + else + { + if (!other->rb_left || + other->rb_left->rb_color == RB_BLACK) + { + register rb_node_t * o_right; + if ((o_right = other->rb_right)) + o_right->rb_color = RB_BLACK; + other->rb_color = RB_RED; + __rb_rotate_left(other, root); + other = parent->rb_left; + } + other->rb_color = parent->rb_color; + parent->rb_color = RB_BLACK; + if (other->rb_left) + other->rb_left->rb_color = RB_BLACK; + __rb_rotate_right(parent, root); + node = root->rb_node; + break; + } + } + } + if (node) + node->rb_color = RB_BLACK; +} + +void rb_erase(rb_node_t * node, rb_root_t * root) +{ + rb_node_t * child, * parent; + int color; + + if (!node->rb_left) + child = node->rb_right; + else if (!node->rb_right) + child = node->rb_left; + else + { + rb_node_t * old = node, * left; + + node = node->rb_right; + while ((left = node->rb_left)) + node = left; + child = node->rb_right; + parent = node->rb_parent; + color = node->rb_color; + + if (child) + child->rb_parent = parent; + if (parent) + { + if (parent->rb_left == node) + parent->rb_left = child; + else + parent->rb_right = child; + } + else + root->rb_node = child; + + if (node->rb_parent == old) + parent = node; + node->rb_parent = old->rb_parent; + node->rb_color = old->rb_color; + node->rb_right = old->rb_right; + node->rb_left = old->rb_left; + + if (old->rb_parent) + { + if (old->rb_parent->rb_left == old) + old->rb_parent->rb_left = node; + else + old->rb_parent->rb_right = node; + } else + root->rb_node = node; + + old->rb_left->rb_parent = node; + if (old->rb_right) + old->rb_right->rb_parent = node; + goto color; + } + + parent = node->rb_parent; + color = node->rb_color; + + if (child) + child->rb_parent = parent; + if (parent) + { + if (parent->rb_left == node) + parent->rb_left = child; + else + parent->rb_right = child; + } + else + root->rb_node = child; + + color: + if (color == RB_BLACK) + __rb_erase_color(child, parent, root); +} +EXPORT_SYMBOL(rb_erase); diff --git a/xen/drivers/block/xen_block.c b/xen/drivers/block/xen_block.c index b3aa086f06..6b993551e5 100644 --- a/xen/drivers/block/xen_block.c +++ b/xen/drivers/block/xen_block.c @@ -287,7 +287,7 @@ long do_block_io_op(block_io_op_t *u_block_io_op) /* query VBD information for self or others (or all) */ if ( (ret = vbd_probe(&op.u.probe_params)) == 0 ) copy_to_user(u_block_io_op, &op, sizeof(op)); - break; + break; case BLOCK_IO_OP_VBD_INFO: /* query information about a particular VBD */ @@ -449,21 +449,15 @@ static void dispatch_rw_block_io(struct task_struct *p, phys_seg[nr_psegs].nr_sects = nr_sects; /* Translate the request into the relevant 'physical device' */ - new_segs = vbd_translate(&phys_seg[nr_psegs], p, operation); - - /* If it fails we bail (unless the caller is privileged). */ + new_segs = vbd_translate(&phys_seg[nr_psegs], p, operation); if ( new_segs < 0 ) { - if ( unlikely(new_segs != -ENODEV) || unlikely(!IS_PRIV(p)) ) - { - DPRINTK("access denied: %s of [%ld,%ld] on dev=%04x\n", - operation == READ ? "read" : "write", - req->sector_number + tot_sects, - req->sector_number + tot_sects + nr_sects, - req->device); - goto bad_descriptor; - } - new_segs = 1; + DPRINTK("access denied: %s of [%ld,%ld] on dev=%04x\n", + operation == READ ? "read" : "write", + req->sector_number + tot_sects, + req->sector_number + tot_sects + nr_sects, + req->device); + goto bad_descriptor; } nr_psegs += new_segs; diff --git a/xen/drivers/block/xen_vbd.c b/xen/drivers/block/xen_vbd.c index e5ffdfa016..84a5539154 100644 --- a/xen/drivers/block/xen_vbd.c +++ b/xen/drivers/block/xen_vbd.c @@ -1,5 +1,9 @@ -/* - * xen_vbd.c : routines for managing virtual block devices +/****************************************************************************** + * xen_vbd.c + * + * Routines for managing virtual block devices. + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand */ #include @@ -17,111 +21,117 @@ #include #include -/* -** XXX SMH: the below probe functions /append/ their info to the -** xdi array; i.e. they assume that all earlier slots are correctly -** filled, and that xdi->count points to the first free entry in -** the array. All kinda gross but it'll do for now. -*/ -extern int ide_probe_devices(xen_disk_info_t *xdi); -extern int scsi_probe_devices(xen_disk_info_t *xdi); - -/* XXX SMH: crappy 'hash function' .. fix when care. */ -#define HSH(_x) ((_x) & (VBD_HTAB_SZ - 1)) - - -/* -** Create a new VBD; all this involves is adding an entry to the domain's -** vbd hash table; caller must be privileged. -*/ -long vbd_create(vbd_create_t *create) +long __vbd_create(struct task_struct *p, + unsigned short vdevice, + unsigned char mode, + unsigned char type) { - struct task_struct *p; - vbd_t *new_vbd, **pv; + vbd_t *vbd; + rb_node_t **rb_p, *rb_parent = NULL; long ret = 0; unsigned long cpu_mask; - if ( unlikely(!IS_PRIV(current)) ) - return -EPERM; - - if ( unlikely((p = find_domain_by_id(create->domain)) == NULL) ) - { - DPRINTK("vbd_create attempted for non-existent domain %d\n", - create->domain); - return -EINVAL; - } - spin_lock(&p->vbd_lock); - for ( pv = &p->vbdtab[HSH(create->vdevice)]; - *pv != NULL; - pv = &(*pv)->next ) + rb_p = &p->vbd_rb.rb_node; + while ( *rb_p != NULL ) { - if ( unlikely((*pv)->vdevice == create->vdevice) ) + rb_parent = *rb_p; + vbd = rb_entry(rb_parent, vbd_t, rb); + if ( vdevice < vbd->vdevice ) + { + rb_p = &rb_parent->rb_left; + } + else if ( vdevice > vbd->vdevice ) + { + rb_p = &rb_parent->rb_right; + } + else { DPRINTK("vbd_create attempted for already existing vbd\n"); ret = -EINVAL; goto out; } - if ( (*pv)->vdevice > create->vdevice ) - break; } - if ( unlikely((new_vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) ) + if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) ) { DPRINTK("vbd_create: out of memory\n"); ret = -ENOMEM; goto out; } - new_vbd->vdevice = create->vdevice; - new_vbd->mode = create->mode; - new_vbd->extents = NULL; - new_vbd->next = *pv; + vbd->vdevice = vdevice; + vbd->mode = mode; + vbd->type = type; + vbd->extents = NULL; - *pv = new_vbd; + rb_link_node(&vbd->rb, rb_parent, rb_p); + rb_insert_color(&vbd->rb, &p->vbd_rb); cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); guest_event_notify(cpu_mask); out: spin_unlock(&p->vbd_lock); - put_task_struct(p); return ret; } -/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */ -long vbd_grow(vbd_grow_t *grow) +long vbd_create(vbd_create_t *create) { - struct task_struct *p; - xen_extent_le_t **px, *x; - vbd_t *v; - long ret = 0; - unsigned long cpu_mask; + struct task_struct *p; + long rc; if ( unlikely(!IS_PRIV(current)) ) - return -EPERM; + return -EPERM; - if ( unlikely((p = find_domain_by_id(grow->domain)) == NULL) ) + if ( unlikely((p = find_domain_by_id(create->domain)) == NULL) ) { - DPRINTK("vbd_grow: attempted for non-existent domain %d\n", - grow->domain); + DPRINTK("vbd_create attempted for non-existent domain %d\n", + domain); return -EINVAL; } + rc = __vbd_create(p, create->vdevice, create->mode, + XD_TYPE_DISK | XD_FLAG_VIRT); + + put_task_struct(p); + + return rc; +} + + +long __vbd_grow(struct task_struct *p, + unsigned short vdevice, + xen_extent_t *extent) +{ + xen_extent_le_t **px, *x; + vbd_t *vbd = NULL; + rb_node_t *rb; + long ret = 0; + unsigned long cpu_mask; + spin_lock(&p->vbd_lock); - for ( v = p->vbdtab[HSH(grow->vdevice)]; v != NULL; v = v->next ) - if ( v->vdevice == grow->vdevice ) - break; + rb = p->vbd_rb.rb_node; + while ( rb != NULL ) + { + vbd = rb_entry(rb, vbd_t, rb); + if ( vdevice < vbd->vdevice ) + rb = rb->rb_left; + else if ( vdevice > vbd->vdevice ) + rb = rb->rb_right; + else + break; + } - if ( unlikely(v == NULL) ) + if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) ) { DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n"); ret = -EINVAL; - goto out; - } + goto out; + } if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) ) { @@ -130,12 +140,12 @@ long vbd_grow(vbd_grow_t *grow) goto out; } - x->extent.device = grow->extent.device; - x->extent.start_sector = grow->extent.start_sector; - x->extent.nr_sectors = grow->extent.nr_sectors; + x->extent.device = extent->device; + x->extent.start_sector = extent->start_sector; + x->extent.nr_sectors = extent->nr_sectors; x->next = (xen_extent_le_t *)NULL; - for ( px = &v->extents; *px != NULL; px = &(*px)->next ) + for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) continue; *px = x; @@ -145,16 +155,40 @@ long vbd_grow(vbd_grow_t *grow) out: spin_unlock(&p->vbd_lock); - put_task_struct(p); return ret; } +/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */ +long vbd_grow(vbd_grow_t *grow) +{ + struct task_struct *p; + long rc; + + if ( unlikely(!IS_PRIV(current)) ) + return -EPERM; + + if ( unlikely((p = find_domain_by_id(grow->domain)) == NULL) ) + { + DPRINTK("vbd_grow: attempted for non-existent domain %d\n", + grow->domain); + return -EINVAL; + } + + rc = __vbd_grow(p, grow->vdevice, &grow->extent); + + put_task_struct(p); + + return rc; +} + + long vbd_shrink(vbd_shrink_t *shrink) { struct task_struct *p; xen_extent_le_t **px, *x; - vbd_t *v; + vbd_t *vbd = NULL; + rb_node_t *rb; long ret = 0; unsigned long cpu_mask; @@ -170,11 +204,21 @@ long vbd_shrink(vbd_shrink_t *shrink) spin_lock(&p->vbd_lock); - for ( v = p->vbdtab[HSH(shrink->vdevice)]; v != NULL; v = v->next ) - if ( v->vdevice == shrink->vdevice ) - break; + rb = p->vbd_rb.rb_node; + while ( rb != NULL ) + { + vbd = rb_entry(rb, vbd_t, rb); + if ( shrink->vdevice < vbd->vdevice ) + rb = rb->rb_left; + else if ( shrink->vdevice > vbd->vdevice ) + rb = rb->rb_right; + else + break; + } - if ( unlikely(v == NULL) || unlikely(v->extents == NULL) ) + if ( unlikely(vbd == NULL) || + unlikely(vbd->vdevice != shrink->vdevice) || + unlikely(vbd->extents == NULL) ) { DPRINTK("vbd_shrink: attempt to remove non-existent extent.\n"); ret = -EINVAL; @@ -182,7 +226,7 @@ long vbd_shrink(vbd_shrink_t *shrink) } /* Find the last extent. We now know that there is at least one. */ - for ( px = &v->extents; (*px)->next != NULL; px = &(*px)->next ) + for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next ) continue; x = *px; @@ -204,7 +248,8 @@ long vbd_setextents(vbd_setextents_t *setextents) struct task_struct *p; xen_extent_t e; xen_extent_le_t *new_extents, *x, *t; - vbd_t *v; + vbd_t *vbd = NULL; + rb_node_t *rb; int i; long ret = 0; unsigned long cpu_mask; @@ -221,11 +266,20 @@ long vbd_setextents(vbd_setextents_t *setextents) spin_lock(&p->vbd_lock); - for ( v = p->vbdtab[HSH(setextents->vdevice)]; v != NULL; v = v->next ) - if ( v->vdevice == setextents->vdevice ) - break; + rb = p->vbd_rb.rb_node; + while ( rb != NULL ) + { + vbd = rb_entry(rb, vbd_t, rb); + if ( setextents->vdevice < vbd->vdevice ) + rb = rb->rb_left; + else if ( setextents->vdevice > vbd->vdevice ) + rb = rb->rb_right; + else + break; + } - if ( unlikely(v == NULL) ) + if ( unlikely(vbd == NULL) || + unlikely(vbd->vdevice != setextents->vdevice) ) { DPRINTK("vbd_setextents: attempt to modify non-existent VBD.\n"); ret = -EINVAL; @@ -260,14 +314,14 @@ long vbd_setextents(vbd_setextents_t *setextents) } /* Delete the old extent list _after_ successfully creating the new. */ - for ( x = v->extents; x != NULL; x = t ) + for ( x = vbd->extents; x != NULL; x = t ) { t = x->next; kfree(x); } /* Make the new list visible. */ - v->extents = new_extents; + vbd->extents = new_extents; cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); guest_event_notify(cpu_mask); @@ -291,7 +345,8 @@ long vbd_setextents(vbd_setextents_t *setextents) long vbd_delete(vbd_delete_t *delete) { struct task_struct *p; - vbd_t *v, **pv; + vbd_t *vbd; + rb_node_t *rb; xen_extent_le_t *x, *t; unsigned long cpu_mask; @@ -307,14 +362,18 @@ long vbd_delete(vbd_delete_t *delete) spin_lock(&p->vbd_lock); - for ( pv = &p->vbdtab[HSH(delete->vdevice)]; - *pv != NULL; - pv = &(*pv)->next ) + rb = p->vbd_rb.rb_node; + while ( rb != NULL ) { - if ( (*pv)->vdevice == delete->vdevice ) + vbd = rb_entry(rb, vbd_t, rb); + if ( delete->vdevice < vbd->vdevice ) + rb = rb->rb_left; + else if ( delete->vdevice > vbd->vdevice ) + rb = rb->rb_right; + else goto found; } - + DPRINTK("vbd_delete attempted for non-existing VBD.\n"); spin_unlock(&p->vbd_lock); @@ -322,10 +381,9 @@ long vbd_delete(vbd_delete_t *delete) return -EINVAL; found: - v = *pv; - *pv = v->next; - x = v->extents; - kfree(v); + rb_erase(rb, &p->vbd_rb); + x = vbd->extents; + kfree(vbd); while ( x != NULL ) { @@ -345,28 +403,27 @@ long vbd_delete(vbd_delete_t *delete) void destroy_all_vbds(struct task_struct *p) { - int i; - vbd_t *v; + vbd_t *vbd; + rb_node_t *rb; xen_extent_le_t *x, *t; unsigned long cpu_mask; spin_lock(&p->vbd_lock); - for ( i = 0; i < VBD_HTAB_SZ; i++ ) + + while ( (rb = p->vbd_rb.rb_node) != NULL ) { - while ( (v = p->vbdtab[i]) != NULL ) + vbd = rb_entry(rb, vbd_t, rb); + + rb_erase(rb, &p->vbd_rb); + x = vbd->extents; + kfree(vbd); + + while ( x != NULL ) { - p->vbdtab[i] = v->next; - - x = v->extents; - kfree(v); - - while ( x != NULL ) - { - t = x->next; - kfree(x); - x = t; - } - } + t = x->next; + kfree(x); + x = t; + } } cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); @@ -376,68 +433,100 @@ void destroy_all_vbds(struct task_struct *p) } -/* - * vbd_probe_devices: - * - * add the virtual block devices for this domain to a xen_disk_info_t; - * we assume xdi->count points to the first unused place in the array. - */ -static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p) +static int vbd_probe_single(xen_disk_info_t *xdi, + vbd_t *vbd, + struct task_struct *p) { xen_extent_le_t *x; xen_disk_t cur_disk; - vbd_t *v; - int i; - spin_lock(&p->vbd_lock); + if ( xdi->count == xdi->max ) + { + DPRINTK("vbd_probe_devices: out of space for probe.\n"); + return -ENOMEM; + } - for ( i = 0; i < VBD_HTAB_SZ; i++ ) + cur_disk.device = vbd->vdevice; + cur_disk.info = vbd->type; + if ( !VBD_CAN_WRITE(vbd) ) + cur_disk.info |= XD_FLAG_RO; + cur_disk.capacity = 0 ; + for ( x = vbd->extents; x != NULL; x = x->next ) + cur_disk.capacity += x->extent.nr_sectors; + cur_disk.domain = p->domain; + + /* Now copy into relevant part of user-space buffer */ + if( copy_to_user(&xdi->disks[xdi->count], + &cur_disk, + sizeof(xen_disk_t)) ) { - for ( v = p->vbdtab[i]; v != NULL; v = v->next ) - { - if ( xdi->count == xdi->max ) - { - DPRINTK("vbd_probe_devices: out of space for probe.\n"); - spin_unlock(&p->vbd_lock); - return -ENOMEM; - } - - cur_disk.device = v->vdevice; - cur_disk.info = XD_FLAG_VIRT | XD_TYPE_DISK; - if ( !VBD_CAN_WRITE(v) ) - cur_disk.info |= XD_FLAG_RO; - cur_disk.capacity = 0 ; - for ( x = v->extents; x != NULL; x = x->next ) - cur_disk.capacity += x->extent.nr_sectors; - cur_disk.domain = p->domain; - - /* Now copy into relevant part of user-space buffer */ - if( copy_to_user(&xdi->disks[xdi->count], - &cur_disk, - sizeof(xen_disk_t)) ) - { - DPRINTK("vbd_probe_devices: copy_to_user failed\n"); - spin_unlock(&p->vbd_lock); - return -EFAULT; - } + DPRINTK("vbd_probe_devices: copy_to_user failed\n"); + return -EFAULT; + } - xdi->count++; + xdi->count++; + + return 0; +} + + +static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p) +{ + int rc = 0; + rb_node_t *rb; + + spin_lock(&p->vbd_lock); + + if ( (rb = p->vbd_rb.rb_node) == NULL ) + goto out; + + new_subtree: + /* STEP 1. Find least node (it'll be left-most). */ + while ( rb->rb_left != NULL ) + rb = rb->rb_left; + + for ( ; ; ) + { + /* STEP 2. Dealt with left subtree. Now process current node. */ + if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 ) + goto out; + + /* STEP 3. Process right subtree, if any. */ + if ( rb->rb_right != NULL ) + { + rb = rb->rb_right; + goto new_subtree; } - } + /* STEP 4. Done both subtrees. Head back through ancesstors. */ + for ( ; ; ) + { + /* We're done when we get back to the root node. */ + if ( rb->rb_parent == NULL ) + goto out; + /* If we are left of parent, then parent is next to process. */ + if ( rb->rb_parent->rb_left == rb ) + break; + /* If we are right of parent, then we climb to grandparent. */ + rb = rb->rb_parent; + } + + rb = rb->rb_parent; + } + + out: spin_unlock(&p->vbd_lock); - return 0; + return rc; } /* -** Return information about the VBDs available for a given domain, -** or for all domains; in the general case the 'domain' argument -** will be 0 which means "information about the caller"; otherwise -** the 'domain' argument will specify either a given domain, or -** all domains ("VBD_PROBE_ALL") -- both of these cases require the -** caller to be privileged. -*/ + * Return information about the VBDs available for a given domain, or for all + * domains; in the general case the 'domain' argument will be 0 which means + * "information about the caller"; otherwise the 'domain' argument will + * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of + * these cases require the caller to be privileged. + */ long vbd_probe(vbd_probe_t *probe) { struct task_struct *p = NULL; @@ -446,7 +535,7 @@ long vbd_probe(vbd_probe_t *probe) if ( probe->domain != 0 ) { - /* We can only probe for ourselves unless we're privileged. */ + /* We can only probe for ourselves (unless we're privileged). */ if( (probe->domain != current->domain) && !IS_PRIV(current) ) return -EPERM; @@ -465,14 +554,6 @@ long vbd_probe(vbd_probe_t *probe) get_task_struct(p); /* to mirror final put_task_struct */ } - if ( (probe->domain == VBD_PROBE_ALL) || IS_PRIV(p) ) - { - /* Privileged domains always get access to the 'real' devices. */ - if ( ((ret = ide_probe_devices(&probe->xdi)) != 0) || - ((ret = scsi_probe_devices(&probe->xdi)) != 0) ) - goto out; - } - if ( probe->domain == VBD_PROBE_ALL ) { read_lock_irqsave(&tasklist_lock, flags); @@ -507,7 +588,8 @@ long vbd_info(vbd_info_t *info) struct task_struct *p; xen_extent_le_t *x; xen_extent_t *extents; - vbd_t *v; + vbd_t *vbd = NULL; + rb_node_t *rb; long ret = 0; if ( (info->domain != current->domain) && !IS_PRIV(current) ) @@ -522,22 +604,30 @@ long vbd_info(vbd_info_t *info) spin_lock(&p->vbd_lock); - for ( v = p->vbdtab[HSH(info->vdevice)]; v != NULL; v = v->next ) - if ( v->vdevice == info->vdevice ) - break; + rb = p->vbd_rb.rb_node; + while ( rb != NULL ) + { + vbd = rb_entry(rb, vbd_t, rb); + if ( info->vdevice < vbd->vdevice ) + rb = rb->rb_left; + else if ( info->vdevice > vbd->vdevice ) + rb = rb->rb_right; + else + break; + } - if ( v == NULL ) + if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) ) { DPRINTK("vbd_info attempted on non-existent VBD.\n"); ret = -EINVAL; goto out; } - info->mode = v->mode; + info->mode = vbd->mode; info->nextents = 0; extents = info->extents; - for ( x = v->extents; x != NULL; x = x->next ) + for ( x = vbd->extents; x != NULL; x = x->next ) { if ( info->nextents == info->maxextents ) break; @@ -561,26 +651,34 @@ long vbd_info(vbd_info_t *info) int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation) { xen_extent_le_t *x; - vbd_t *v; + vbd_t *vbd; + rb_node_t *rb; unsigned long sec_off, nr_secs; spin_lock(&p->vbd_lock); - for ( v = p->vbdtab[HSH(pseg->dev)]; v != NULL; v = v->next ) - if ( v->vdevice == pseg->dev ) - goto found; + rb = p->vbd_rb.rb_node; + while ( rb != NULL ) + { + vbd = rb_entry(rb, vbd_t, rb); + if ( pseg->dev < vbd->vdevice ) + rb = rb->rb_left; + else if ( pseg->dev > vbd->vdevice ) + rb = rb->rb_right; + else + goto found; + } - if ( unlikely(!IS_PRIV(p)) ) - DPRINTK("vbd_translate; domain %d attempted to access " - "non-existent VBD.\n", p->domain); + DPRINTK("vbd_translate; domain %d attempted to access " + "non-existent VBD.\n", p->domain); spin_unlock(&p->vbd_lock); return -ENODEV; found: - if ( ((operation == READ) && !VBD_CAN_READ(v)) || - ((operation == WRITE) && !VBD_CAN_WRITE(v)) ) + if ( ((operation == READ) && !VBD_CAN_READ(vbd)) || + ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) ) { spin_unlock(&p->vbd_lock); return -EACCES; @@ -592,7 +690,7 @@ int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation) */ sec_off = pseg->sector_number; nr_secs = pseg->nr_sects; - for ( x = v->extents; x != NULL; x = x->next ) + for ( x = vbd->extents; x != NULL; x = x->next ) { if ( sec_off < x->extent.nr_sectors ) { diff --git a/xen/drivers/ide/ide-xeno.c b/xen/drivers/ide/ide-xeno.c index 005274dfe6..1fc3ce37f6 100644 --- a/xen/drivers/ide/ide-xeno.c +++ b/xen/drivers/ide/ide-xeno.c @@ -6,9 +6,7 @@ #include #include -#define NR_IDE_DEVS 20 - -static kdev_t ide_devs[NR_IDE_DEVS] = { +static kdev_t ide_devs[] = { MKDEV(IDE0_MAJOR, 0), MKDEV(IDE0_MAJOR, 64), /* hda, hdb */ MKDEV(IDE1_MAJOR, 0), MKDEV(IDE1_MAJOR, 64), /* hdc, hdd */ MKDEV(IDE2_MAJOR, 0), MKDEV(IDE2_MAJOR, 64), /* hde, hdf */ @@ -21,63 +19,43 @@ static kdev_t ide_devs[NR_IDE_DEVS] = { MKDEV(IDE9_MAJOR, 0), MKDEV(IDE9_MAJOR, 64) /* hds, hdt */ }; - - - -int ide_probe_devices(xen_disk_info_t* xdi) +void ide_probe_devices(xen_disk_info_t* xdi) { - int loop, ret = 0; - unsigned int unit; - unsigned short type; + int i, unit; ide_drive_t *drive; - xen_disk_t cur_disk; + xen_disk_t *xd = &xdi->disks[xdi->count]; - for ( loop = 0; loop < MAX_HWIFS; loop++ ) + for ( i = 0; i < MAX_HWIFS; i++ ) { - ide_hwif_t *hwif = &ide_hwifs[loop]; + ide_hwif_t *hwif = &ide_hwifs[i]; if ( !hwif->present ) continue; for ( unit = 0; unit < MAX_DRIVES; unit++ ) { drive = &hwif->drives[unit]; - if ( !drive->present ) continue; - - - /* SMH: don't ever expect this to happen, hence verbose printk */ - if ( xdi->count == xdi->max ) { - printk("ide_probe_devices: out of space for probe.\n"); - return -ENOMEM; - } + if ( !drive->present ) + continue; - - - /* SMH: we export 'raw' linux device numbers to domain 0 */ - cur_disk.device = ide_devs[(loop * MAX_DRIVES) + unit]; + if ( xdi->count == xdi->max ) + BUG(); - /* - ** NB: we use the ide 'media' field (ide_disk, ide_cdrom, etc) - ** as our 'type' field (XD_TYPE_DISK, XD_TYPE_CDROM, etc). - ** Hence must ensure these are kept in sync. - */ - cur_disk.info = (type = drive->media); - if(type == XD_TYPE_CDROM) - cur_disk.info |= XD_FLAG_RO; + /* We export 'raw' linux device numbers to domain 0. */ + xd->device = ide_devs[(i * MAX_DRIVES) + unit]; - cur_disk.capacity = current_capacity(drive); - cur_disk.domain = 0; /* 'physical' disks belong to domain 0 */ + /* + * NB: we use the ide 'media' field (ide_disk, ide_cdrom, etc) as + * our 'type' field (XD_TYPE_DISK, XD_TYPE_CDROM, etc). Hence must + * ensure these are kept in sync. + */ + if ( (xd->info = drive->media) == XD_TYPE_CDROM ) + xd->info |= XD_FLAG_RO; - /* Now copy into relevant part of user-space buffer */ - if((ret = copy_to_user(xdi->disks + xdi->count, &cur_disk, - sizeof(xen_disk_t))) < 0) { - printk("ide_probe_devices: copy_to_user failed [rc=%d]\n", - ret); - return ret; - } + xd->capacity = current_capacity(drive); + xd->domain = 0; xdi->count++; + xd++; } } - - return ret; } diff --git a/xen/drivers/scsi/sd.c b/xen/drivers/scsi/sd.c index 046e2e55ed..ab0ecff05b 100644 --- a/xen/drivers/scsi/sd.c +++ b/xen/drivers/scsi/sd.c @@ -32,19 +32,10 @@ #include #include - -/* #include */ -/* #include */ #include -/* #include */ -/* #include */ #include -/* #include */ -/* #include */ #include -/* #include */ - #include #include #include @@ -1322,9 +1313,7 @@ static void sd_finish() ** */ -#define NR_SCSI_DEVS 16 - -static kdev_t scsi_devs[NR_SCSI_DEVS] = { +static kdev_t scsi_devs[] = { MKDEV(SCSI_DISK0_MAJOR, 0), MKDEV(SCSI_DISK0_MAJOR, 16), /* sda, sdb */ MKDEV(SCSI_DISK0_MAJOR, 32), MKDEV(SCSI_DISK0_MAJOR, 48), /* sdc, sdd */ MKDEV(SCSI_DISK0_MAJOR, 64), MKDEV(SCSI_DISK0_MAJOR, 80), /* sde, sdf */ @@ -1336,39 +1325,29 @@ static kdev_t scsi_devs[NR_SCSI_DEVS] = { }; -int scsi_probe_devices(xen_disk_info_t *xdi) +void scsi_probe_devices(xen_disk_info_t *xdi) { + int i; Scsi_Disk *sd; - xen_disk_t cur_disk; - int i, ret; + xen_disk_t *xd = &xdi->disks[xdi->count]; for ( sd = rscsi_disks, i = 0; i < sd_template.dev_max; i++, sd++ ) { - if ( sd->device == NULL ) continue; + if ( sd->device == NULL ) + continue; - /* SMH: don't ever expect this to happen, hence verbose printk */ - if ( xdi->count == xdi->max ) { - printk("scsi_probe_devices: out of space for probe.\n"); - return -ENOMEM; - } + if ( xdi->count == xdi->max ) + BUG(); + + /* We export 'raw' linux device numbers to domain 0. */ + xd->device = scsi_devs[i]; + xd->info = XD_TYPE_DISK; /* XXX should determine properly */ + xd->capacity = sd->capacity; + xd->domain = 0; - /* SMH: we export 'raw' linux device numbers to domain 0 */ - cur_disk.device = scsi_devs[i]; - cur_disk.info = XD_TYPE_DISK; // XXX SMH: should determine properly - cur_disk.capacity = sd->capacity; - cur_disk.domain = 0; // 'physical' disks belong to dom0 - - /* Now copy into relevant part of user-space buffer */ - if((ret = copy_to_user(xdi->disks + xdi->count, &cur_disk, - sizeof(xen_disk_t))) < 0) { - printk("scsi_probe_devices: copy_to_user failed [rc=%d]\n", ret); - return ret; - } - xdi->count++; + xd++; } - - return 0; } diff --git a/xen/include/xeno/multiboot.h b/xen/include/xeno/multiboot.h index bdf313262b..4a68f31a0b 100644 --- a/xen/include/xeno/multiboot.h +++ b/xen/include/xeno/multiboot.h @@ -26,7 +26,7 @@ #define MULTIBOOT_BOOTLOADER_MAGIC 0x2BADB002 /* The symbol table for a.out. */ -typedef struct aout_symbol_table +typedef struct { unsigned long tabsize; unsigned long strsize; @@ -35,7 +35,7 @@ typedef struct aout_symbol_table } aout_symbol_table_t; /* The section header table for ELF. */ -typedef struct elf_section_header_table +typedef struct { unsigned long num; unsigned long size; @@ -44,7 +44,7 @@ typedef struct elf_section_header_table } elf_section_header_table_t; /* The Multiboot information. */ -typedef struct multiboot_info +typedef struct { unsigned long flags; unsigned long mem_lower; @@ -63,7 +63,7 @@ typedef struct multiboot_info } multiboot_info_t; /* The module structure. */ -typedef struct module +typedef struct { unsigned long mod_start; unsigned long mod_end; @@ -73,7 +73,7 @@ typedef struct module /* The memory map. Be careful that the offset 0 is base_addr_low but no size. */ -typedef struct memory_map +typedef struct { unsigned long size; unsigned long base_addr_low; diff --git a/xen/include/xeno/rbtree.h b/xen/include/xeno/rbtree.h new file mode 100644 index 0000000000..5d77f1ca28 --- /dev/null +++ b/xen/include/xeno/rbtree.h @@ -0,0 +1,134 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + Some example of insert and search follows here. The search is a plain + normal search over an ordered tree. The insert instead must be implemented + int two steps: as first thing the code must insert the element in + order as a red leaf in the tree, then the support library function + rb_insert_color() must be called. Such function will do the + not trivial work to rebalance the rbtree if necessary. + +----------------------------------------------------------------------- +static inline struct page * rb_search_page_cache(struct inode * inode, + unsigned long offset) +{ + rb_node_t * n = inode->i_rb_page_cache.rb_node; + struct page * page; + + while (n) + { + page = rb_entry(n, struct page, rb_page_cache); + + if (offset < page->offset) + n = n->rb_left; + else if (offset > page->offset) + n = n->rb_right; + else + return page; + } + return NULL; +} + +static inline struct page * __rb_insert_page_cache(struct inode * inode, + unsigned long offset, + rb_node_t * node) +{ + rb_node_t ** p = &inode->i_rb_page_cache.rb_node; + rb_node_t * parent = NULL; + struct page * page; + + while (*p) + { + parent = *p; + page = rb_entry(parent, struct page, rb_page_cache); + + if (offset < page->offset) + p = &(*p)->rb_left; + else if (offset > page->offset) + p = &(*p)->rb_right; + else + return page; + } + + rb_link_node(node, parent, p); + + return NULL; +} + +static inline struct page * rb_insert_page_cache(struct inode * inode, + unsigned long offset, + rb_node_t * node) +{ + struct page * ret; + if ((ret = __rb_insert_page_cache(inode, offset, node))) + goto out; + rb_insert_color(node, &inode->i_rb_page_cache); + out: + return ret; +} +----------------------------------------------------------------------- +*/ + +#ifndef _LINUX_RBTREE_H +#define _LINUX_RBTREE_H + +#include +#include +#include + +typedef struct rb_node_s +{ + struct rb_node_s * rb_parent; + int rb_color; +#define RB_RED 0 +#define RB_BLACK 1 + struct rb_node_s * rb_right; + struct rb_node_s * rb_left; +} +rb_node_t; + +typedef struct rb_root_s +{ + struct rb_node_s * rb_node; +} +rb_root_t; + +#define RB_ROOT (rb_root_t) { NULL, } +#define rb_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +extern void rb_insert_color(rb_node_t *, rb_root_t *); +extern void rb_erase(rb_node_t *, rb_root_t *); + +static inline void rb_link_node(rb_node_t * node, rb_node_t * parent, rb_node_t ** rb_link) +{ + node->rb_parent = parent; + node->rb_color = RB_RED; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +#endif /* _LINUX_RBTREE_H */ diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h index 5ffbf49ff1..22a446c798 100644 --- a/xen/include/xeno/sched.h +++ b/xen/include/xeno/sched.h @@ -15,6 +15,7 @@ #include #include #include +#include #define MAX_DOMAIN_NAME 16 @@ -126,8 +127,8 @@ struct task_struct BLK_RING_IDX blk_resp_prod; /* (private version of) response producer */ struct list_head blkdev_list; spinlock_t blk_ring_lock; - vbd_t *vbdtab[VBD_HTAB_SZ]; /* mapping from 16-bit vdevices to vbds */ - spinlock_t vbd_lock; + rb_root_t vbd_rb; /* mapping from 16-bit vdevices to vbds */ + spinlock_t vbd_lock; /* protects VBD mapping */ /* VM */ struct mm_struct mm; diff --git a/xen/include/xeno/vbd.h b/xen/include/xeno/vbd.h index 6734549418..6f1b6a04e5 100644 --- a/xen/include/xeno/vbd.h +++ b/xen/include/xeno/vbd.h @@ -9,27 +9,40 @@ #include #include -/* an entry in a list of xen_extent's */ +#include + +/* An entry in a list of xen_extents. */ typedef struct _xen_extent_le { - xen_extent_t extent; /* an individual extent */ + xen_extent_t extent; /* an individual extent */ struct _xen_extent_le *next; /* and a pointer to the next */ } xen_extent_le_t; - /* -** This is what a vbd looks like from the pov of xen: essentially a list -** of xen_extents which a given domain refers to by a particular 16bit id. -** Each domain has a hash table to map from these to the relevant VBD. -*/ + * This is what a vbd looks like from the p.o.v. of xen: essentially a list of + * xen_extents which a given domain refers to by a particular 16bit id. Each + * domain has a lookup structure to map from these to the relevant VBD. + */ typedef struct _vbd { unsigned short vdevice; /* what the domain refers to this vbd as */ - unsigned short mode; /* VBD_MODE_{READONLY,READWRITE} */ + unsigned char mode; /* VBD_MODE_{R,W} */ + unsigned char type; /* XD_TYPE_xxx */ xen_extent_le_t *extents; /* list of xen_extents making up this vbd */ - struct _vbd *next; /* for chaining in the hash table */ + rb_node_t rb; /* for linking into R-B tree lookup struct */ } vbd_t; -#define VBD_HTAB_SZ 16 /* # entries in the vbd hash table. */ +/* + * Internal forms of 'vbd_create' and 'vbd_grow. Used when setting up real + * physical device access for domain 0. + */ +long __vbd_create(struct task_struct *p, + unsigned short vdevice, + unsigned char mode, + unsigned char type); +long __vbd_grow(struct task_struct *p, + unsigned short vdevice, + xen_extent_t *extent); +/* This is the main API, accessible from guest OSes. */ long vbd_create(vbd_create_t *create_params); long vbd_grow(vbd_grow_t *grow_params); long vbd_shrink(vbd_shrink_t *shrink_params); diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.c b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.c index d27797b41d..47aac7e082 100644 --- a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.c +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.c @@ -3,11 +3,10 @@ * * Xenolinux virtual block-device driver. * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge */ -/* Some modifications to the original by Mark A. Williamson and (C) Intel - * Research Cambridge */ - #include "xl_block.h" #include #include @@ -31,9 +30,6 @@ static blk_ring_t *blk_ring; static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */ static BLK_RING_IDX req_prod; /* Private request producer. */ -#define XDI_MAX 64 -xen_disk_info_t xlblk_disk_info; /* information about our disks/VBDs */ - /* We plug the I/O ring if the driver is suspended or if the ring is full. */ #define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \ (state != STATE_ACTIVE)) @@ -66,6 +62,25 @@ static inline void signal_requests_to_xen(void) } +/* + * xlblk_update_int/update-vbds_task - handle VBD update events from Xen + * + * Schedule a task for keventd to run, which will update the VBDs and perform + * the corresponding updates to our view of VBD state, so the XenoLinux will + * respond to changes / additions / deletions to the set of VBDs automatically. + */ +static struct tq_struct update_tq; +static void update_vbds_task(void *unused) +{ + xlvbd_update_vbds(); +} +static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + update_tq.routine = update_vbds_task; + schedule_task(&update_tq); +} + + int xenolinux_block_open(struct inode *inode, struct file *filep) { short xldev = inode->i_rdev; @@ -100,9 +115,9 @@ int xenolinux_block_open(struct inode *inode, struct file *filep) } } - /* RACE: need locking SMP / pre-emptive kernels */ + /* Update of usage count is protected by per-device semaphore. */ disk->usage++; - DPRINTK("xenolinux_block_open\n"); + return 0; } @@ -110,17 +125,16 @@ int xenolinux_block_open(struct inode *inode, struct file *filep) int xenolinux_block_release(struct inode *inode, struct file *filep) { xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); - disk->usage--; /* RACE: need locking for SMP / pre-emptive kernels */ - DPRINTK("xenolinux_block_release\n"); - - /* A reference to a disk has been dropped: may enable more changes to VBDs to - * go through (currently don't do any updates while references are held), so - * we run the update magic again. Could equally well schedule this update for - * keventd to run, or use a flag so we only update at this point if we think - * something (relevant) may have changed. - * Keventd has the advantage that it'll serialise executions of this function - * - there's a race here for SMP / pre-emptive kernels */ - xlvbd_update_vbds(); + + /* + * When usage drops to zero it may allow more VBD updates to occur. + * Update of usage count is protected by a per-device semaphore. + */ + if ( --disk->usage == 0 ) + { + update_tq.routine = update_vbds_task; + schedule_task(&update_tq); + } return 0; } @@ -212,34 +226,36 @@ int xenolinux_block_check(kdev_t dev) return 0; } -/* MAW - leaving this as it is for now. As long as we're responding to the VBD - * update events from the hypervisor, I figure this will still do what it's - * meant to do :-) */ int xenolinux_block_revalidate(kdev_t dev) { - struct gendisk *gd = get_gendisk(dev); - xl_disk_t *disk = xldev_to_xldisk(dev); - unsigned long flags, capacity = gd->part[MINOR(dev)].nr_sects; - int i, disk_nr = MINOR(dev) >> gd->minor_shift; + struct block_device *bd; + struct gendisk *gd; + xl_disk_t *disk; + unsigned long flags, capacity; + int i, rc = 0, disk_nr = MINOR(dev) >> gd->minor_shift; - DPRINTK("xenolinux_block_revalidate: %d\n", dev); + if ( (bd = bdget(dev)) == NULL ) + return -EINVAL; /* - * We didn't construct this VBD by reading a partition table. This - * function can only do bad things to us. + * Update of partition info, and check of usage count, is protected + * by the per-block-device semaphore. */ - if ( capacity == 0 ) - return -EINVAL; + down(&bd->bd_sem); - spin_lock_irqsave(&io_request_lock, flags); - if ( disk->usage > 1 ) + if ( ((gd = get_gendisk(dev)) == NULL) || + ((disk = xldev_to_xldisk(dev)) == NULL) || + ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) { - spin_unlock_irqrestore(&io_request_lock, flags); - return -EBUSY; + rc = -EINVAL; + goto out; } - spin_unlock_irqrestore(&io_request_lock, flags); - /* RACE? is it OK that we give up the lock */ + if ( disk->usage > 1 ) + { + rc = -EBUSY; + goto out; + } for ( i = gd->max_p - 1; i >= 0; i-- ) { @@ -249,12 +265,12 @@ int xenolinux_block_revalidate(kdev_t dev) gd->sizes[MINOR(dev+i)] = 0; } - /* shouldn't need to revalidate VBDs here as it's done automatically when - * we get the VBD update event from Xen */ - grok_partitions(gd, disk_nr, gd->max_p, capacity); - return 0; + out: + up(&bd->bd_sem); + bdput(bd); + return rc; } @@ -449,43 +465,6 @@ static void kick_pending_request_queues(void) } -/** - * do_update_vbds - called in process context by keventd to update VBDs - * @arg: dummy argument to fit schedule_task API - * - * When this function is run, it simply calls through to xlvbd_update_vbds in - * update the VBD state information. The argument is ignored - it's only there - * because the API for scheduling with keventd requires it. - */ -void do_update_vbds(void * arg) -{ - DPRINTK("xl_block.c::do_update_vbds() - called\n"); - xlvbd_update_vbds(); -} - -/* this data is needed to register do_update_vbds() as a task for keventd */ -static struct tq_struct update = { - .sync = 0, - .routine = do_update_vbds, - .data = 0 -}; - -/** - * xlblk_update_int - handle VBD update events from Xen - * - * This function schedules a task for keventd to run, which will update the - * VBDs and perform the corresponding updates to our view of VBD state, so the - * XenoLinux will respond to changes / additions / deletions to the set of VBDs - * automatically. - */ -static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs) -{ - DPRINTK("xl_block.c::xlblk_update_int() - called\n"); - - schedule_task(&update); -} - - static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) { BLK_RING_IDX i; @@ -530,7 +509,6 @@ static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) } - static void reset_xlblk_interface(void) { block_io_op_t op; @@ -556,7 +534,6 @@ static void reset_xlblk_interface(void) int __init xlblk_init(void) { int error; - block_io_op_t op; reset_xlblk_interface(); @@ -577,31 +554,7 @@ int __init xlblk_init(void) goto fail; } - /* Setup our [empty] disk information structure */ - xlblk_disk_info.max = XDI_MAX; - xlblk_disk_info.disks = kmalloc(XDI_MAX * sizeof(xen_disk_t), GFP_KERNEL); - xlblk_disk_info.count = 0; - - /* Probe for disk information. */ - memset(&op, 0, sizeof(op)); - op.cmd = BLOCK_IO_OP_VBD_PROBE; - op.u.probe_params.domain = 0; - memcpy(&op.u.probe_params.xdi, &xlblk_disk_info, sizeof(xlblk_disk_info)); - - error = HYPERVISOR_block_io_op(&op); - - if ( error ) - { - printk(KERN_ALERT "Could not probe disks (%d)\n", error); - free_irq(XLBLK_RESPONSE_IRQ, NULL); - goto fail; - } - - /* copy back the [updated] count parameter */ - xlblk_disk_info.count = op.u.probe_params.xdi.count; - - /* Pass the information to our virtual block device susbystem. */ - xlvbd_init(&xlblk_disk_info); + (void)xlvbd_init(); return 0; @@ -609,6 +562,7 @@ int __init xlblk_init(void) return error; } + static void __exit xlblk_cleanup(void) { xlvbd_cleanup(); diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.h b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.h index d856dd7601..c735a6ec44 100644 --- a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.h +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.h @@ -52,7 +52,6 @@ typedef struct xl_disk { int usage; } xl_disk_t; -/* Generic layer. */ extern int xenolinux_control_msg(int operration, char *buffer, int size); extern int xenolinux_block_open(struct inode *inode, struct file *filep); extern int xenolinux_block_release(struct inode *inode, struct file *filep); @@ -62,22 +61,22 @@ extern int xenolinux_block_check(kdev_t dev); extern int xenolinux_block_revalidate(kdev_t dev); extern void do_xlblk_request (request_queue_t *rq); -extern xen_disk_info_t xlblk_disk_info; /* this is really in xl_block.c */ -extern void xlvbd_update_vbds(void); /* this is really in xl_vbd.c */ +extern void xlvbd_update_vbds(void); static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) { struct gendisk *gd = get_gendisk(xldev); - - if(!gd) return NULL; - + + if ( gd == NULL ) + return NULL; + return (xl_disk_t *)gd->real_devices + (MINOR(xldev) >> gd->minor_shift); } /* Virtual block-device subsystem. */ -extern int xlvbd_init(xen_disk_info_t *xdi); +extern int xlvbd_init(void); extern void xlvbd_cleanup(void); #endif /* __XL_BLOCK_H__ */ diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_vbd.c b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_vbd.c index f9dec059aa..795ae3c3aa 100644 --- a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_vbd.c +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_vbd.c @@ -3,11 +3,10 @@ * * Xenolinux virtual block-device driver (xvd). * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge */ -/* Some modifications to the original by Mark A. Williamson and (C) Intel - * Research Cambridge */ - #include "xl_block.h" #include @@ -30,13 +29,9 @@ #define XLSCSI_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ #define XLSCSI_MAX_PART (1 << XLSCSI_PARTN_SHIFT) /* minors per scsi vbd */ -#define XLVBD_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */ +#define XLVBD_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */ #define XLVBD_MAX_PART (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */ -/* Used to record data in vbd_state[] and detect changes in configuration */ -#define VBD_NODEV 1 -#define VBD_KNOWN 2 - /* The below are for the generic drivers/block/ll_rw_block.c code. */ static int xlide_blksize_size[256]; static int xlide_hardsect_size[256]; @@ -48,6 +43,11 @@ static int xlvbd_blksize_size[256]; static int xlvbd_hardsect_size[256]; static int xlvbd_max_sectors[256]; +/* Information from Xen about our VBDs. */ +#define MAX_VBDS 64 +static int nr_vbds; +static xen_disk_t *vbd_info; + static struct block_device_operations xlvbd_block_fops = { open: xenolinux_block_open, @@ -57,10 +57,29 @@ static struct block_device_operations xlvbd_block_fops = revalidate: xenolinux_block_revalidate, }; - /* hold state about for all possible VBDs for use in handling updates */ -static char vbd_state[65536]; +static int xlvbd_get_vbd_info(xen_disk_t *disk_info) +{ + int error; + block_io_op_t op; + + /* Probe for disk information. */ + memset(&op, 0, sizeof(op)); + op.cmd = BLOCK_IO_OP_VBD_PROBE; + op.u.probe_params.domain = 0; + op.u.probe_params.xdi.max = MAX_VBDS; + op.u.probe_params.xdi.disks = disk_info; + op.u.probe_params.xdi.count = 0; + + if ( (error = HYPERVISOR_block_io_op(&op)) != 0 ) + { + printk(KERN_ALERT "Could not probe disks (%d)\n", error); + return -1; + } + + return op.u.probe_params.xdi.count; +} -/** +/* * xlvbd_init_device - initialise a VBD device * @disk: a xen_disk_t describing the VBD * @@ -71,23 +90,37 @@ static char vbd_state[65536]; * corruption does not occur. Also, devices that are in use should not have * their details updated. This is the caller's responsibility. */ -int xlvbd_init_device(xen_disk_t *disk) +static int xlvbd_init_device(xen_disk_t *xd) { - int device = disk->device; + int device = xd->device; int major = MAJOR(device); int minor = MINOR(device); int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */ - int partno; - char * major_name; - int max_part; - + char *major_name; struct gendisk *gd; - int result; - int j; + struct block_device *bd; + xl_disk_t *disk; + int i, rc = 0, max_part, partno; unsigned char buf[64]; + if ( (bd = bdget(device)) == NULL ) + return -1; + + /* + * Update of partition info, and check of usage count, is protected + * by the per-block-device semaphore. + */ + down(&bd->bd_sem); + + if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) ) + { + printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device); + rc = -1; + goto out; + } + if ( is_ide ) { major_name = XLIDE_MAJOR_NAME; @@ -108,11 +141,11 @@ int xlvbd_init_device(xen_disk_t *disk) if ( (gd = get_gendisk(device)) == NULL ) { - result = register_blkdev(major, major_name, &xlvbd_block_fops); - if ( result < 0 ) + rc = register_blkdev(major, major_name, &xlvbd_block_fops); + if ( rc < 0 ) { printk(KERN_ALERT "XL VBD: can't get major %d\n", major); - return -1; /* XXX make this sane one day */ + goto out; } if ( is_ide ) @@ -199,7 +232,7 @@ int xlvbd_init_device(xen_disk_t *disk) blk_size[major] = gd->sizes; } - if ( XD_READONLY(disk->info) ) + if ( XD_READONLY(xd->info) ) set_device_ro(device, 1); gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XENO; @@ -214,14 +247,12 @@ int xlvbd_init_device(xen_disk_t *disk) if ( gd->sizes[minor & ~(max_part-1)] != 0 ) { kdev_t dev = device & ~(max_part-1); - for ( j = max_part - 1; j >= 0; j-- ) + for ( i = max_part - 1; i >= 0; i-- ) { - invalidate_device(dev+j, 1); - gd->part[MINOR(dev+j)].start_sect = 0; - gd->part[MINOR(dev+j)].nr_sects = 0; - gd->sizes[MINOR(dev+j)] = 0; - - vbd_state[dev+j] &= ~VBD_KNOWN; + invalidate_device(dev+i, 1); + gd->part[MINOR(dev+i)].start_sect = 0; + gd->part[MINOR(dev+i)].nr_sects = 0; + gd->sizes[MINOR(dev+i)] = 0; } printk(KERN_ALERT "Virtual partitions found for /dev/%s - ignoring any " @@ -231,31 +262,27 @@ int xlvbd_init_device(xen_disk_t *disk) /* Need to skankily setup 'partition' information */ gd->part[minor].start_sect = 0; - gd->part[minor].nr_sects = disk->capacity; - gd->sizes[minor] = disk->capacity; + gd->part[minor].nr_sects = xd->capacity; + gd->sizes[minor] = xd->capacity; gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; - - vbd_state[device] |= VBD_KNOWN; } else { /* Some final fix-ups depending on the device type */ - switch ( XD_TYPE(disk->info) ) + switch ( XD_TYPE(xd->info) ) { case XD_TYPE_CDROM: case XD_TYPE_FLOPPY: case XD_TYPE_TAPE: - gd->part[minor].nr_sects = disk->capacity; - gd->sizes[minor] = disk->capacity>>(BLOCK_SIZE_BITS-9); + gd->part[minor].nr_sects = xd->capacity; + gd->sizes[minor] = xd->capacity>>(BLOCK_SIZE_BITS-9); gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; printk(KERN_ALERT "Skipping partition check on %s /dev/%s\n", - XD_TYPE(disk->info)==XD_TYPE_CDROM ? "cdrom" : - (XD_TYPE(disk->info)==XD_TYPE_TAPE ? "tape" : + XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : + (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : "floppy"), disk_name(gd, MINOR(device), buf)); - - vbd_state[device] |= VBD_KNOWN; /* remember the VBD is there now */ break; case XD_TYPE_DISK: @@ -268,134 +295,172 @@ int xlvbd_init_device(xen_disk_t *disk) break; } register_disk(gd, device, gd->max_p, &xlvbd_block_fops, - disk->capacity); - - vbd_state[device] |= VBD_KNOWN; /* remember the VBD is there now */ - + xd->capacity); break; default: printk(KERN_ALERT "XenoLinux: unknown device type %d\n", - XD_TYPE(disk->info)); + XD_TYPE(xd->info)); break; } } - printk(KERN_ALERT "XenoLinux Virtual Block Device Driver " - "installed [device: %04x]\n", device); - - return 0; + out: + up(&bd->bd_sem); + bdput(bd); + return rc; } -/** - * xlvbd_remove - see if a VBD should be removed and do so if appropriate +/* + * xlvbd_remove_device - remove a device node if possible * @device: numeric device ID * * Updates the gendisk structure and invalidates devices. * * This is OK for now but in future, should perhaps consider where this should - * deallocate gendisks / unregister devices? + * deallocate gendisks / unregister devices. */ -int xlvbd_remove(int device) +static int xlvbd_remove_device(int device) { - int major = MAJOR(device); - int minor = MINOR(device); - int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ - int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */ - int i; /* loop counter */ - int partno; - int max_part; - char * major_name; - + int i, rc = 0, max_part, minor = MINOR(device); struct gendisk *gd; + struct block_device *bd; + xl_disk_t *disk; - DPRINTK("xl_vbd.c::xlvbd_remove() - Removing a VBD\n"); - - /* if device is in use then we shouldn't change its settings */ - if(xldev_to_xldisk(device)->usage) - { - DPRINTK("xl_vbd.c::xlvbd_remove() - VBD in use, could not remove\n"); - printk(KERN_ALERT "Removing XenoLinux VBD failed - " - "in use [device: %x]\n", device); + if ( (bd = bdget(device)) == NULL ) return -1; - } - if((gd = get_gendisk(device)) == NULL) + /* + * Update of partition info, and check of usage count, is protected + * by the per-block-device semaphore. + */ + down(&bd->bd_sem); + + if ( ((gd = get_gendisk(device)) == NULL) || + ((disk = xldev_to_xldisk(device)) == NULL) ) + BUG(); + + if ( disk->usage != 0 ) { - printk(KERN_ALERT - "xl_vbd.c::xlvbd_remove() - ERROR could not get gendisk\n"); - - return -1; + printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device); + rc = -1; + goto out; } - if ( is_ide ) - { - major_name = XLIDE_MAJOR_NAME; - max_part = XLIDE_MAX_PART; - } - else if ( is_scsi ) - { - major_name = XLSCSI_MAJOR_NAME; - max_part = XLSCSI_MAX_PART; + if ( IDE_DISK_MAJOR(MAJOR(device)) ) + max_part = XLIDE_MAX_PART; + else if ( SCSI_BLK_MAJOR(MAJOR(device)) ) + max_part = XLSCSI_MAX_PART; + else + max_part = XLVBD_MAX_PART; + + if ( (minor & (max_part-1)) != 0 ) + { + /* 1: The VBD is mapped to a partition rather than a whole unit. */ + invalidate_device(device, 1); + gd->part[minor].start_sect = 0; + gd->part[minor].nr_sects = 0; + gd->sizes[minor] = 0; + + /* Clear the consists-of-virtual-partitions flag if possible. */ + gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS; + for ( i = 0; i < max_part; i++ ) + if ( gd->sizes[(minor & ~(max_part-1)) + i] != 0 ) + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; } else - { - major_name = XLVBD_MAJOR_NAME; - max_part = XLVBD_MAX_PART; + { + /* 2: The VBD is mapped to an entire 'unit'. Clear all partitions. */ + for ( i = max_part - 1; i >= 0; i-- ) + { + invalidate_device(device+i, 1); + gd->part[minor+i].start_sect = 0; + gd->part[minor+i].nr_sects = 0; + gd->sizes[minor+i] = 0; + } } - partno = minor & (max_part - 1); + out: + up(&bd->bd_sem); + bdput(bd); + return rc; +} + +/* + * xlvbd_update_vbds - reprobes the VBD status and performs updates driver + * state. The VBDs need to be updated in this way when the domain is + * initialised and also each time we receive an XLBLK_UPDATE event. + */ +void xlvbd_update_vbds(void) +{ + int i, j, k, old_nr, new_nr; + xen_disk_t *old_info, *new_info, *merged_info; - DPRINTK("Got partno = 0x%x\n", partno); + old_info = vbd_info; + old_nr = nr_vbds; - if(partno) /* if the VBD is mapped to a "partition" device node in Linux */ + new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); + if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 ) { - int should_clear_virtpart = 1; /* if this is set true we should clear - * the GENHD_FL_VIRT_PARTNS flag in the - * gendisk */ - - gd->sizes[minor] = 0; + kfree(new_info); + return; + } - for(i = 0; i < max_part; i++) - if(gd->sizes[minor - partno + i]) should_clear_virtpart = 0; - - /* if there aren't any virtual partitions here then clear the flag for - * this unit */ - if(should_clear_virtpart) - { - gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS; + /* + * Final list maximum size is old list + new list. This occurs only when + * old list and new list do not overlap at all, and we cannot yet destroy + * VBDs in the old list because the usage counts are busy. + */ + merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL); - DPRINTK("xl_vbd.c::xlvbd_remove() - " - "cleared virtual partition flag\n"); - } - - gd->part[MINOR(device)].start_sect = 0; - gd->part[MINOR(device)].nr_sects = 0; - gd->sizes[MINOR(device)] = 0; - - invalidate_device(device, 1); + /* @i tracks old list; @j tracks new list; @k tracks merged list. */ + i = j = k = 0; - vbd_state[device] &= ~VBD_KNOWN; /* forget VBD was ever there */ - } - else /* the VBD is mapped to a "whole disk drive" device node in Linux */ + while ( (i < old_nr) && (j < new_nr) ) { - for ( i = max_part - 1; i >= 0; i-- ) + if ( old_info[i].device < new_info[j].device ) { - invalidate_device(device+i, 1); - gd->part[MINOR(device+i)].start_sect = 0; - gd->part[MINOR(device+i)].nr_sects = 0; - gd->sizes[MINOR(device+i)] = 0; - - vbd_state[device+i] &= ~VBD_KNOWN; /* forget VBD was ever there */ + if ( xlvbd_remove_device(old_info[i].device) != 0 ) + memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + i++; + } + else if ( old_info[i].device > new_info[j].device ) + { + if ( xlvbd_init_device(&new_info[j]) == 0 ) + memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + j++; + } + else + { + if ( xlvbd_init_device(&new_info[j]) == 0 ) + memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + else + memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + i++; j++; } } - printk(KERN_ALERT "XenoLinux Virtual Block Device removed " - " [device: %04x]\n", device); - return 0; + for ( ; i < old_nr; i++ ) + { + if ( xlvbd_remove_device(old_info[i].device) != 0 ) + memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); + } + + for ( ; j < new_nr; j++ ) + { + if ( xlvbd_init_device(&new_info[j]) == 0 ) + memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); + } + + vbd_info = merged_info; + nr_vbds = k; + + kfree(old_info); + kfree(new_info); } + /* * Set up all the linux device goop for the virtual block devices (vbd's) that * xen tells us about. Note that although from xen's pov VBDs are addressed @@ -404,17 +469,18 @@ int xlvbd_remove(int device) * linux -- this is just for convenience as it means e.g. that the same * /etc/fstab can be used when booting with or without xen. */ -int __init xlvbd_init(xen_disk_info_t *xdi) +int __init xlvbd_init(void) { - int i; /* loop counter */ + int i; + /* + * If compiled as a module, we don't support unloading yet. We therefore + * permanently increment the reference count to disallow it. + */ SET_MODULE_OWNER(&xlvbd_block_fops); + MOD_INC_USE_COUNT; /* Initialize the global arrays. */ - - for( i = 0; i < 65536; i++) - vbd_state[i] = VBD_NODEV; - for ( i = 0; i < 256; i++ ) { /* from the generic ide code (drivers/ide/ide-probe.c, etc) */ @@ -433,186 +499,25 @@ int __init xlvbd_init(xen_disk_info_t *xdi) xlvbd_max_sectors[i] = 128; } - /* - * We need to loop through each major device we've been told about and: - * a) register the appropriate blkdev - * b) setup the indexed-by-major global arrays (blk_size[], - * blksize_size[], hardsect_size[], max_sectors[], read_ahead[]) - * c) setup the block queue + make it sensible - * d) create an appropriate gendisk structure, and - * e) register the gendisk - */ - for ( i = 0; i < xdi->count; i++ ) - { - xlvbd_init_device(&xdi->disks[i]); - } - - return 0; -} - -/** - * xlvbd_update_vbds - reprobes the VBD status and performs updates driver state - * - * The VBDs need to be updated in this way when the domain is initialised and - * also each time we receive an XLBLK_UPDATE event. - * - * The vbd_state array is consistent on entry to and exit from this function but - * not whilst the function runs, so this should not be called re-entrantly. - */ -void xlvbd_update_vbds(void) -{ - int i; /* loop counter */ - int ret; /* return values */ - block_io_op_t op; /* for talking to Xen */ - - xen_disk_info_t *xdi = &xlblk_disk_info; /* pointer to structures in - * xl_block.c */ - - /* Probe for disk information. */ - memset(&op, 0, sizeof(op)); - op.cmd = BLOCK_IO_OP_VBD_PROBE; - op.u.probe_params.domain = 0; - - xdi->count = 0; /* need to keep resetting this to zero because the probe - * will append results after "used" space in the array */ - - memcpy(&op.u.probe_params.xdi, &xlblk_disk_info, sizeof(xlblk_disk_info)); + vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); + nr_vbds = xlvbd_get_vbd_info(vbd_info); - ret = HYPERVISOR_block_io_op(&op); - - if ( ret ) + if ( nr_vbds < 0 ) { - printk(KERN_ALERT "Could not probe disks (%d)\n", ret); + kfree(vbd_info); + vbd_info = NULL; + nr_vbds = 0; } - - /* copy back the [updated] count parameter */ - xlblk_disk_info.count = op.u.probe_params.xdi.count; - - DPRINTK("Retrieved %d disks\n",op.u.probe_params.xdi.count); - - - for( i = 0; i < 65536; i++ ) - vbd_state[i] |= VBD_NODEV; - - for( i = 0; i < xdi->count; i++ ) - { - int device = xdi->disks[i].device; - xl_disk_t *d; - - vbd_state[device] &= ~VBD_NODEV; - - DPRINTK("Inspecting xen_disk_t: device = %hx, info = %hx, " - "capacity = %lx, domain = %d\n", - xdi->disks[i].device, xdi->disks[i].info, xdi->disks[i].capacity, - xdi->disks[i].domain); - - if(xdi->disks[i].info & XD_FLAG_VIRT) - { - /* RACE: need to fix this for SMP / pre-emptive kernels */ - - d = xldev_to_xldisk(device); - - /* only go on to monkey with this stuff if we successfully got the - * xldisk and it says no-one else is using the disk OR if we didn't - * successfully retrieve the xldisk (so it doesn't exist and nobody - * can be using it), otherwise skip on to the next device */ - if(d != NULL && d->usage > 0) - { - printk(KERN_ALERT "XenoLinux VBD Driver: " - "skipping update in a disk currently in use"); - DPRINTK("Usage = %d\n", d->usage); - continue; /* skip to next device */ - } - - printk(KERN_ALERT "XenoLinux VBD Driver: updating a VBD " - "[device: %x]\n", device); - /* also takes care of any overrides (i.e. due to VBDs mapped to - * partitions overriding VBDs mapped to disks) and of registering - * disks */ - xlvbd_init_device(xdi->disks + i); - } - - } - - for( i = 0; i < 65536; i++ ) + else { - switch(vbd_state[i]) - { - case VBD_NODEV | VBD_KNOWN: /* a VBD we knew about before has gone */ - - DPRINTK("About to remove VBD 0x%x\n",i); - - ret = xlvbd_remove(i); - - if(ret) DPRINTK("Failed to remove VBD\n"); - - break; - - case VBD_NODEV: /* there's nothing here and there wasn't anything - * before */ - break; - - case VBD_KNOWN: /* the device is present and it's set up */ - break; - - case 0: /* there's a device present we haven't set up - either - * one of the "non virtual" VBDs or we weren't able to - * update it because it was mounted */ - break; - - default: /* if there's any other weird combination, something - * unexpected is happening */ - printk(KERN_ALERT "xl_vbd.c::xlvbd_update_vbds: BUG - Unknown state " - "when updating VBDs: 0x%x\n", vbd_state[i]); - } + for ( i = 0; i < nr_vbds; i++ ) + xlvbd_init_device(&vbd_info[i]); } + return 0; } -void xlvbd_cleanup(void) -{ - int is_ide, is_scsi, i; - struct gendisk *gd; - char *major_name; - int major; - - for ( major = 0; major < MAX_BLKDEV; major++ ) - { - if ( (gd = get_gendisk(MKDEV(major, 0))) == NULL ) - continue; - - /* - * If this is a 'Xeno' blkdev then at least one unit will have the Xeno - * flag set. - */ - for ( i = 0; i < gd->nr_real; i++ ) - if ( gd->flags[i] & GENHD_FL_XENO ) - break; - if ( i == gd->nr_real ) - continue; - - is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ - is_scsi = SCSI_BLK_MAJOR(major); /* is this a scsi device? */ - - blk_cleanup_queue(BLK_DEFAULT_QUEUE(major)); - - if ( is_ide ) - major_name = XLIDE_MAJOR_NAME; - else if ( is_scsi ) - major_name = XLSCSI_MAJOR_NAME; - else - major_name = XLVBD_MAJOR_NAME; - - if ( unregister_blkdev(major, major_name) != 0 ) - printk(KERN_ALERT "XenoLinux Virtual Block Device Driver:" - "major device %04x uninstalled w/ errors\n", major); - - /* XXX shouldn't we remove the gendisk from the kernel linked list and - * deallocate the memory here? */ - } -} #ifdef MODULE module_init(xlvbd_init); -module_exit(xlvbd_cleanup); #endif diff --git a/xenolinux-2.4.24-sparse/include/asm-xeno/hypervisor.h b/xenolinux-2.4.24-sparse/include/asm-xeno/hypervisor.h index 0fbf40c951..064088ff6f 100644 --- a/xenolinux-2.4.24-sparse/include/asm-xeno/hypervisor.h +++ b/xenolinux-2.4.24-sparse/include/asm-xeno/hypervisor.h @@ -230,7 +230,7 @@ static inline int HYPERVISOR_net_io_op(netop_t *op) __asm__ __volatile__ ( TRAP_INSTR : "=a" (ret) : "0" (__HYPERVISOR_net_io_op), - "b" (op) ); + "b" (op) : "memory" ); return ret; } @@ -297,18 +297,18 @@ static inline int HYPERVISOR_network_op(void *network_op) __asm__ __volatile__ ( TRAP_INSTR : "=a" (ret) : "0" (__HYPERVISOR_network_op), - "b" (network_op) ); + "b" (network_op) : "memory" ); return ret; } -static inline int HYPERVISOR_block_io_op(void * block_io_op) +static inline int HYPERVISOR_block_io_op(void *block_io_op) { int ret; __asm__ __volatile__ ( TRAP_INSTR : "=a" (ret) : "0" (__HYPERVISOR_block_io_op), - "b" (block_io_op) ); + "b" (block_io_op) : "memory" ); return ret; } -- 2.30.2